library(tidyverse)
library(knitr)
library(plotly) ; library(viridis) ; library(gridExtra) ; library(RColorBrewer)
library(biomaRt)
library(Rtsne)
library(glmnet) ; library(ROCR) ; library(car)
library(corrplot)
library(expss) ; library(knitr)

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

Background

In 20_04_07_LR.html we performed a logistic regression but we discovered that the features were strongly correlated. This inflates the standard error of the coefficients, making them no longer interpretable.

# Clusterings
clustering_selected = 'DynamicHybridMergedSmall'
clusterings = read_csv('./../Data/clusters.csv')
clusterings$Module = clusterings[,clustering_selected] %>% data.frame %>% unlist %>% unname

assigned_module = data.frame('ID' = clusterings$ID, 'Module' = clusterings$Module)

# Original dataset
original_dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'), row.names=1)

# Model dataset
load('./../Data/LR_model.RData')

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org')
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=rownames(dataset), mart=mart)

Variance Inflation Factor (VIF) and correlation plot

# VIF
car::vif(fit) %>% sort
##       absGS       MTcor   MM.00BCD8   MM.00BFC4   MM.B79F00   MM.E76BF3 
##    1.120003    3.322214    5.760965   11.977660   28.704800   30.453238 
##   MM.619CFF   MM.E58700   MM.D89000   MM.FF67A4   MM.39B600   MM.A3A500 
##   40.019097   43.341694   48.009397   55.717881   74.444993   90.507830 
##          GS   MM.00BF7D   MM.8AAB00   MM.00A7FF   MM.F8766D   MM.00BD5F 
##   96.406577  104.269895  124.383852  132.186820  185.578598  196.779668 
##   MM.EF7F49   MM.FF62BC   MM.B983FF   MM.FD61D1   MM.FE6E8A   MM.C99800 
##  217.997495  244.733566  267.532403  289.004766  293.298020  311.423166 
##   MM.9590FF   MM.00B7E9   MM.D376FF   MM.00BA38   MM.00B0F6   MM.00C0AF 
##  345.026524  354.944339  367.784988  397.850985  423.554841  476.163259 
##   MM.6BB100   MM.F564E3   MM.00C097 
##  494.468875  550.127905 1003.865351
# Correlation plot
corrplot.mixed(cor(train_set[,-ncol(train_set)]), lower = 'number', lower.col = 'gray', number.cex = .6, tl.pos = 'l', tl.col = '#666666')

rm(getinfo, mart, clusterings)

Possible solutions to Multicollinearity:


  1. Remove all variables with a VIF>10: We would lose all but three of our variables, not ideal

  2. Do Principal Component Regression: We would lose the relation between the prediction and the original features, which would be interesting to study

  3. Don’t do anything: Multicollinearity affects the coefficients and p-values of the regression, but it doesn’t affect the predictions, precision of the predictions or the goodness-of-fit statistics ref, but as with the previous option, we cannot study the coefficients of the regression

  4. Use Ridge Regression


Ridge Regression


Using the same train and test sets created in 20_04_07_LR.html


Train model

train_set$SFARI = train_set$SFARI %>% as.factor

x = train_set[,-ncol(train_set)] %>% as.matrix
y = train_set$SFARI
lambda_seq = 10^seq(1, -4, by = -.1)

fit = cv.glmnet(x, y, alpha = 0, lambda  = lambda_seq, family = 'binomial')

best_lambda = fit$lambda.min
cat(paste0('The best model has a lambda of ', best_lambda))
## The best model has a lambda of 0.000398107170553497
best_fit = glmnet(x, y, alpha=0, lambda=best_lambda, family='binomial')


rm(x, y, lambda_seq, fit)

Predict labels in test set

test_set$prob = predict(best_fit, newx = test_set[,1:(ncol(train_set)-1)] %>% as.matrix, s=best_lambda, type='response') %>% as.vector
test_set$pred = test_set$prob>0.5


Performance metrics


Confusion matrix

conf_mat = test_set %>% apply_labels(SFARI = 'Actual Labels', 
                                     prob = 'Assigned Probability', 
                                     pred = 'Label Prediction')

cro(conf_mat$SFARI, list(conf_mat$pred, total()))
 Label Prediction     #Total 
 FALSE   TRUE   
 Actual Labels 
   FALSE  1973 1054   3027
   TRUE  75 102   177
   #Total cases  2048 1156   3204
rm(conf_mat)

Accuracy

acc = mean(test_set$SFARI==test_set$pred)

print(paste0('Accuracy = ', round(acc,4)))
## [1] "Accuracy = 0.6476"
rm(acc)

ROC Curve

pred_ROCR = prediction(test_set$prob, test_set$SFARI)

roc_ROCR = performance(pred_ROCR, measure='tpr', x.measure='fpr')
AUC = performance(pred_ROCR, measure='auc')@y.values[[1]]

plot(roc_ROCR, main=paste0('ROC curve (AUC=',round(AUC,2),')'), col='#009999')
abline(a=0, b=1, col='#666666')

Lift Curve

lift_ROCR = performance(pred_ROCR, measure='lift', x.measure='rpp')
plot(lift_ROCR, main='Lift curve', col='#86b300')

rm(pred_ROCR, roc_ROCR, AUC, lift_ROCR)




Coefficients


  • There is apositive relation between the coefficient assigned to the membership of each module and the percentage of SFARI genes that are assigned to that module

  • MTcor has a low coefficient and Gene Significance has a negative coefficient

gene_corr_info = dataset %>% mutate('ID' = rownames(dataset)) %>% dplyr::select(ID, MTcor, SFARI) %>% left_join(assigned_module, by ='ID') %>%
                 mutate(Module = gsub('#','',Module))

coef_info = best_fit$beta %>% as.matrix %>% data.frame %>% dplyr::rename('coef' = s0) %>% mutate('feature' = gsub('MM.','',rownames(.))) %>% 
            left_join(gene_corr_info, by = c('feature' = 'Module')) %>% dplyr::select(feature, coef, MTcor, SFARI) %>% 
            group_by(feature, coef, MTcor) %>%  summarise('SFARI_perc' = mean(SFARI)) %>% arrange(desc(coef))

kable(coef_info %>% dplyr::select(feature, coef) %>% rename('Feature' = feature, 'Coefficient' = coef),
      align = 'cc', caption = 'Regression Coefficients')
Regression Coefficients
Feature Coefficient
00C0AF 3.0134647
00B0F6 2.6614085
00BA38 2.3979316
00A7FF 1.5304176
EF7F49 1.2506776
D89000 1.1825466
6BB100 1.0500587
00BF7D 0.8185542
39B600 0.5840582
FF62BC 0.5659054
00BCD8 0.2370433
9590FF 0.2124415
00BD5F 0.1826915
00BFC4 0.1129318
MTcor 0.0941246
E58700 0.0169279
C99800 0.0072088
619CFF -0.0194866
absGS -0.1159901
D376FF -0.1829182
B79F00 -0.2707506
F564E3 -0.4203478
GS -0.4583694
00C097 -0.4896129
A3A500 -0.6796896
FE6E8A -0.8069686
FF67A4 -1.0024392
E76BF3 -1.0133888
F8766D -1.1433751
B983FF -1.1490719
8AAB00 -1.1501594
FD61D1 -1.5083219
00B7E9 -2.6675467
ggplotly(coef_info %>% dplyr::rename('Module' = feature) %>% filter(!is.na(MTcor)) %>%
              ggplot(aes(coef, SFARI_perc)) +  geom_smooth(method = 'lm', color = 'gray', alpha = 0.1) + 
              geom_point(aes(id = Module), color = paste0('#',coef_info$feature[!is.na(coef_info$MTcor)])) + 
              theme_minimal() + xlab('Coefficient') + 
              ylab('% of SFARI genes in Module'))
  • There is not an observable relation between the coefficient and the correlation of the module and the diagnosis. This is not a surprise since we knew that there is a negative relation between SFARI genes and Module-Diagnosis correlation from Preprocessing/Gandal/AllRegions/RMarkdowns/20_04_03_WGCNA_modules_EA.html. The fact that the relation between coefficient and Module-Diagnosis correlation is not negative could even be a good sign that the model is picking some biological signal
ggplotly(coef_info %>% dplyr::rename('Module' = feature) %>% filter(!is.na(MTcor)) %>%
              ggplot(aes(coef, MTcor)) +  geom_smooth(method = 'lm', color = 'gray', alpha = 0.1) + 
              geom_point(aes(id = Module), color = paste0('#',coef_info$feature[!is.na(coef_info$MTcor)])) + 
              theme_minimal() + xlab('Coefficient') + 
              ylab('Module-Diagnosis correlation'))




Analyse model


SFARI genes have a slightly higher score distribution than the rest

plot_data = test_set %>% dplyr::select(prob, SFARI)

plot_data %>% ggplot(aes(prob, fill=SFARI, color=SFARI)) + geom_density(alpha=0.3) + xlab('Score') +
              theme_minimal() + ggtitle('Model score distribution by SFARI Label')

  • There seems to be a positive relation between the SFARI scores and the probability assigned by the model

  • The number of observations when separating the test set by SFARI score is quite small, so this is not a robust result, specially for scores 1, 2 and 6

plot_data = test_set %>% mutate(ID=rownames(test_set)) %>% dplyr::select(ID, prob) %>%
            left_join(original_dataset %>% mutate(ID=rownames(original_dataset)), by='ID') %>%
            dplyr::select(ID, prob, gene.score) %>% apply_labels(gene.score='SFARI Gene score')

cro(plot_data$gene.score)
 #Total 
 SFARI Gene score 
   1  5
   2  12
   3  38
   4  86
   5  32
   6  4
   None  3027
   #Total cases  3204
mean_vals = plot_data %>% group_by(gene.score) %>% summarise(mean_prob = mean(prob))

ggplotly(plot_data %>% ggplot(aes(gene.score, prob, fill=gene.score)) + geom_boxplot() + 
              scale_fill_manual(values=SFARI_colour_hue(r=c(1:6,8,7))) + 
              ggtitle('Distribution of probabilities by SFARI score') +
              xlab('SFARI score') + ylab('Probability') + theme_minimal())
rm(mean_vals)

Genes with highest scores in test set


  • Considering the class imbalance in the train set, there are many SFARI scores in here

  • There’s not a single gene with a SFARI score of 5 or 6

test_set %>% dplyr::select(prob, SFARI) %>% mutate(ID = rownames(test_set)) %>% 
             arrange(desc(prob)) %>% top_n(50, wt=prob) %>%
             left_join(original_dataset %>% mutate(ID=rownames(original_dataset)), by='ID')  %>% 
             left_join(gene_names, by = c('ID'='ensembl_gene_id')) %>%
             dplyr::rename('GeneSymbol' = external_gene_id, 'Probability' = prob, 'ModuleDiagnosis_corr' = MTcor, 'GeneSignificance' = GS) %>%
             mutate(ModuleDiagnosis_corr = round(ModuleDiagnosis_corr,4), Probability = round(Probability,4), 
                    GeneSignificance = round(GeneSignificance,4)) %>%
             dplyr::select(GeneSymbol, GeneSignificance, ModuleDiagnosis_corr, Module, Probability, gene.score) %>%
             kable(caption = 'Genes with highest model probabilities from the test set')
Genes with highest model probabilities from the test set
GeneSymbol GeneSignificance ModuleDiagnosis_corr Module Probability gene.score
ARID1B 0.2711 0.1127 #FF62BC 0.8711 1
KMT2A 0.1347 0.7916 #00C097 0.8511 1
TLN2 -0.4783 -0.9514 #00C0AF 0.8504 None
EP400 -0.1671 -0.6031 #00BA38 0.8419 3
NAV1 -0.1734 -0.6031 #00BA38 0.8409 None
SCN2A -0.5375 -0.9514 #00C0AF 0.8397 1
PDS5B -0.4463 -0.9514 #00C0AF 0.8353 None
ATN1 -0.2052 -0.6031 #00BA38 0.8351 None
BAZ2A 0.0534 -0.0094 #00A7FF 0.8314 None
EIF4G3 -0.4827 -0.8040 #00B7E9 0.8312 None
BICD1 -0.0390 0.0586 #FE6E8A 0.8299 None
DAAM1 0.1579 -0.0094 #00A7FF 0.8279 None
RIMBP2 0.2615 -0.0094 #00A7FF 0.8200 None
PTPRS -0.3769 -0.2526 #F564E3 0.8199 None
NFIC -0.4414 -0.6031 #00BA38 0.8190 None
DLGAP1 -0.7790 -0.9514 #00C0AF 0.8158 3
PLXNC1 -0.0088 -0.0094 #00A7FF 0.8143 None
AFF3 -0.3217 0.0586 #FE6E8A 0.8138 None
SCAF4 0.0185 -0.6031 #00BA38 0.8126 None
USP32 -0.6248 -0.6031 #00BA38 0.8123 None
FRMPD4 -0.6526 -0.9514 #00C0AF 0.8110 None
CERS6 -0.4540 -0.9514 #00C0AF 0.8106 None
PPP1R12B -0.0838 -0.6031 #00BA38 0.8099 None
GRIN2A -0.5289 -0.9514 #00C0AF 0.8096 3
WNK1 -0.0192 0.2213 #A3A500 0.8089 None
JAZF1 -0.3136 -0.9514 #00C0AF 0.8080 None
MEF2D -0.4172 -0.6031 #00BA38 0.8067 None
RAB7A -0.4020 -0.6031 #00BA38 0.8065 None
KIAA0226 -0.3510 -0.6031 #00BA38 0.8064 None
PAK2 0.2407 0.1127 #FF62BC 0.8048 3
ZNF385A -0.3231 -0.6031 #00BA38 0.8045 None
MAP3K13 -0.4321 -0.6750 #D376FF 0.8032 None
KMT2E 0.0723 0.7916 #00C097 0.8018 3
UBAP2L -0.3318 -0.6031 #00BA38 0.8015 None
MARK4 -0.4207 -0.6031 #00BA38 0.7993 None
AFAP1 -0.1691 0.1127 #FF62BC 0.7993 None
PRPF8 -0.5452 -0.6031 #00BA38 0.7972 None
RASAL2 -0.1627 -0.4891 #00BF7D 0.7972 None
TET3 0.4863 0.7287 #39B600 0.7971 None
KSR2 -0.4594 -0.6031 #00BA38 0.7971 None
SLC32A1 -0.5797 -0.6031 #00BA38 0.7933 None
BRD4 0.0563 -0.6031 #00BA38 0.7918 4
GRIK5 -0.3145 -0.6031 #00BA38 0.7913 3
TMEM178B -0.4282 -0.9514 #00C0AF 0.7912 None
UBLCP1 -0.4921 -0.9514 #00C0AF 0.7899 None
ZNF275 -0.0260 -0.6031 #00BA38 0.7893 None
NOVA2 -0.5467 -0.6031 #00BA38 0.7874 None
SLC8A2 -0.0972 -0.2526 #F564E3 0.7874 None
UBXN7 0.1040 0.1127 #FF62BC 0.7866 None
SEZ6L -0.3794 -0.9514 #00C0AF 0.7842 None



Negative samples distribution


Selecting the Negative samples in the test set

negative_set = dataset %>% filter(!SFARI & !rownames(.) %in% rownames(train_set)) %>% dplyr::select(-SFARI)
rownames(negative_set) = rownames(dataset)[!dataset$SFARI & !rownames(dataset) %in% rownames(train_set)]

negative_set$prob = predict(best_fit, newx=negative_set %>% as.matrix, s=best_lambda, type='response') %>% as.vector
negative_set$pred = negative_set$prob>0.5

negative_set_table = negative_set %>% apply_labels(prob = 'Assigned Probability', 
                                                   pred = 'Label Prediction')
cro(negative_set_table$pred)
 #Total 
 Label Prediction 
   FALSE  8102
   TRUE  4155
   #Total cases  12257
cat(paste0('\n', sum(negative_set$pred), ' genes are predicted as ASD-related'))
## 
## 4155 genes are predicted as ASD-related
negative_set %>% ggplot(aes(prob)) + geom_density(color='#F8766D', fill='#F8766D', alpha=0.5) +
                 geom_vline(xintercept=0.5, color='#333333', linetype='dotted') + 
                 ggtitle('Probability distribution of the Negative samples in the test set') + 
                 theme_minimal()


Probability and Gene Significance


There’s a lot of noise, but the genes with the highest probabilities have higher (absolute) Gene Significance

negative_set %>% ggplot(aes(prob, GS, color=MTcor)) + geom_point() + geom_smooth(method='loess', color='#666666') +
                 geom_hline(yintercept=0, color='gray', linetype='dashed') +
                 scale_color_gradientn(colours=c('#F8766D','white','#00BFC4')) + 
                 ggtitle('Relation between Probability and Gene Significance') + theme_minimal()

negative_set %>% ggplot(aes(prob, abs(GS), color=MTcor)) + geom_point() + 
                 geom_hline(yintercept=mean(negative_set$absGS), color='gray', linetype='dashed') + 
                 geom_smooth(method='loess', color='#666666') +
                 scale_color_gradientn(colours=c('#F8766D','white','#00BFC4')) + 
                 ggtitle('Relation between Model probability and Gene Significance') + theme_minimal()


Probability and Module-Diagnosis correlation


On average, the model seems to be assigning a probability inversely proportional to the Module-Diagnosis correlation of the module, with the highest positively correlated modules having the lowest average probability and the highest negatively correlated modules the highest average probability. But the difference isn’t big

negative_set %>% ggplot(aes(MTcor, prob, color=GS)) + geom_point() + geom_smooth(method='loess', color='#666666') + 
                 geom_hline(yintercept=mean(negative_set$prob), color='gray', linetype='dashed') +
                 scale_color_gradientn(colours=c('#F8766D','#F8766D','white','#00BFC4','#00BFC4')) + 
                 xlab('Modules ordered by their correlation to ASD') + ylab('Model probability') +
                 theme_minimal()

Summarised version, plotting by module instead of by gene

The difference in the trend lines between this plot and the one above is that the one above takes all the points into consideration while this considers each module as an observation by itself, so the top one is strongly affected by big modules and the bottom one treats all modules the same

The model seems to give higher probabilities to genes belonging to modules with a small (absolute) correlation to Diagnosis, although the difference isn’t much

plot_data = negative_set %>% group_by(MTcor) %>% summarise(mean = mean(prob), sd = sd(prob), n = n()) %>%
            mutate(MTcor_sign = ifelse(MTcor>0, 'Positive', 'Negative')) %>% left_join(original_dataset, by='MTcor') %>%
            dplyr::select(Module, MTcor, MTcor_sign, mean, sd, n) %>% distinct()
colnames(plot_data)[1] = 'ID'

ggplotly(plot_data %>% ggplot(aes(MTcor, mean, size=n, color=MTcor_sign)) + geom_point(aes(id=ID)) + 
         geom_smooth(method='loess', color='gray', se=FALSE) + geom_smooth(method='lm', se=FALSE) + 
         xlab('Module-Diagnosis correlation') + ylab('Mean Probability by Model') + theme_minimal())


Probability and level of expression


There is a positive relation between level of expression and probability, the model seems to be capturing indirectly the level of expression of the genes to make the prediction, so it’s introducing the same bias

# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame
mean_and_sd = data.frame(ID=rownames(datExpr), meanExpr=rowMeans(datExpr), sdExpr=apply(datExpr,1,sd))

plot_data = negative_set %>% mutate(ID=rownames(negative_set)) %>% left_join(mean_and_sd, by='ID') %>% 
            left_join(original_dataset %>% mutate(ID=rownames(original_dataset)) %>% 
                      dplyr::select(ID, Module), by='ID')
colnames(plot_data)[ncol(plot_data)] = 'Module'

plot_data %>% ggplot(aes(meanExpr, prob)) + geom_point(alpha=0.2, color='#0099cc') + 
              geom_smooth(method='loess', color='gray', alpha=0.3) + 
              geom_smooth(method='lm', color='#999999', se=FALSE, alpha=1) + 
              theme_minimal() + ggtitle('Mean expression vs model probability by gene')

rm(mean_and_sd)
plot_data2 = plot_data %>% group_by(Module) %>% summarise(meanExpr = mean(meanExpr), meanProb = mean(prob), n=n())

ggplotly(plot_data2 %>% ggplot(aes(meanExpr, meanProb, size=n)) + geom_point(color=plot_data2$Module) + 
         geom_smooth(method='loess', se=TRUE, color='gray', alpha=0.1, size=0.7) + 
         geom_smooth(method='lm', se=FALSE, color='gray') + theme_minimal() + theme(legend.position='none') + 
         ggtitle('Mean expression vs model probability by Module'))
rm(plot_data2)


Probability and lfc


There is a relation between probability and lfc, so it IS capturing a bit of true information (because lfc and mean expression were negatively correlated and it still has a positive relation in the model)

  • The relation is stronger in genes under-expressed in ASD
plot_data = negative_set %>% mutate(ID=rownames(negative_set)) %>% 
            left_join(DE_info %>% mutate(ID=rownames(DE_info)), by='ID')

plot_data %>% ggplot(aes(log2FoldChange, prob)) + geom_point(alpha=0.1, color='#0099cc') + 
              geom_smooth(method='loess', color='gray', alpha=0.3) + 
              theme_minimal() + ggtitle('lfc vs model probability by gene')

  • The relation is stronger in Differentially Expressed genes
plot_data %>% mutate(DE = padj<0.05) %>% ggplot(aes(log2FoldChange, prob, color=DE)) + geom_point(alpha=0.1) + 
              geom_smooth(method='loess', alpha=0.3) + 
              theme_minimal() + ggtitle('lfc vs model probability by gene')



Conclusion


The model is capturing the mean level of expression of the genes (indirectly through module memberhsip), which is a strong bias found in the SFARI scores, but it seems to be capturing a bit of true biological signal as well (based on the GS and the log fold change plots)


Saving results

save(train_set, test_set, negative_set, best_fit, best_lambda, dataset, file='./../Data/Ridge_model.RData')




Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] expss_0.10.2       corrplot_0.84      car_3.0-7          carData_3.0-3     
##  [5] ROCR_1.0-7         gplots_3.0.3       glmnet_3.0-2       Matrix_1.2-18     
##  [9] Rtsne_0.15         biomaRt_2.40.5     RColorBrewer_1.1-2 gridExtra_2.3     
## [13] viridis_0.5.1      viridisLite_0.3.0  plotly_4.9.2       knitr_1.28        
## [17] forcats_0.5.0      stringr_1.4.0      dplyr_0.8.5        purrr_0.3.3       
## [21] readr_1.3.1        tidyr_1.0.2        tibble_3.0.0       ggplot2_3.3.0     
## [25] tidyverse_1.3.0   
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1                backports_1.1.5            
##   [3] Hmisc_4.4-0                 lazyeval_0.2.2             
##   [5] splines_3.6.3               BiocParallel_1.18.1        
##   [7] crosstalk_1.1.0.1           GenomeInfoDb_1.20.0        
##   [9] digest_0.6.25               foreach_1.5.0              
##  [11] htmltools_0.4.0             gdata_2.18.0               
##  [13] fansi_0.4.1                 magrittr_1.5               
##  [15] checkmate_2.0.0             memoise_1.1.0              
##  [17] cluster_2.1.0               openxlsx_4.1.4             
##  [19] annotate_1.62.0             modelr_0.1.6               
##  [21] matrixStats_0.56.0          prettyunits_1.1.1          
##  [23] jpeg_0.1-8.1                colorspace_1.4-1           
##  [25] blob_1.2.1                  rvest_0.3.5                
##  [27] haven_2.2.0                 xfun_0.12                  
##  [29] crayon_1.3.4                RCurl_1.98-1.1             
##  [31] jsonlite_1.6.1              genefilter_1.66.0          
##  [33] survival_3.1-11             iterators_1.0.12           
##  [35] glue_1.3.2                  gtable_0.3.0               
##  [37] zlibbioc_1.30.0             XVector_0.24.0             
##  [39] DelayedArray_0.10.0         shape_1.4.4                
##  [41] BiocGenerics_0.30.0         abind_1.4-5                
##  [43] scales_1.1.0                DBI_1.1.0                  
##  [45] Rcpp_1.0.4                  xtable_1.8-4               
##  [47] progress_1.2.2              htmlTable_1.13.3           
##  [49] foreign_0.8-75              bit_1.1-15.2               
##  [51] Formula_1.2-3               stats4_3.6.3               
##  [53] htmlwidgets_1.5.1           httr_1.4.1                 
##  [55] acepack_1.4.1               ellipsis_0.3.0             
##  [57] pkgconfig_2.0.3             XML_3.99-0.3               
##  [59] farver_2.0.3                nnet_7.3-13                
##  [61] dbplyr_1.4.2                locfit_1.5-9.4             
##  [63] tidyselect_1.0.0            labeling_0.3               
##  [65] rlang_0.4.5                 AnnotationDbi_1.46.1       
##  [67] munsell_0.5.0               cellranger_1.1.0           
##  [69] tools_3.6.3                 cli_2.0.2                  
##  [71] generics_0.0.2              RSQLite_2.2.0              
##  [73] broom_0.5.5                 evaluate_0.14              
##  [75] yaml_2.2.1                  bit64_0.9-7                
##  [77] fs_1.4.0                    zip_2.0.4                  
##  [79] caTools_1.18.0              nlme_3.1-144               
##  [81] xml2_1.2.5                  compiler_3.6.3             
##  [83] rstudioapi_0.11             curl_4.3                   
##  [85] png_0.1-7                   reprex_0.3.0               
##  [87] geneplotter_1.62.0          stringi_1.4.6              
##  [89] highr_0.8                   lattice_0.20-40            
##  [91] vctrs_0.2.4                 pillar_1.4.3               
##  [93] lifecycle_0.2.0             data.table_1.12.8          
##  [95] bitops_1.0-6                GenomicRanges_1.36.1       
##  [97] R6_2.4.1                    latticeExtra_0.6-29        
##  [99] KernSmooth_2.23-16          rio_0.5.16                 
## [101] IRanges_2.18.3              codetools_0.2-16           
## [103] gtools_3.8.2                assertthat_0.2.1           
## [105] SummarizedExperiment_1.14.1 DESeq2_1.24.0              
## [107] withr_2.1.2                 S4Vectors_0.22.1           
## [109] GenomeInfoDbData_1.2.1      mgcv_1.8-31                
## [111] parallel_3.6.3              hms_0.5.3                  
## [113] grid_3.6.3                  rpart_4.1-15               
## [115] rmarkdown_2.1               Biobase_2.44.0             
## [117] lubridate_1.7.4             base64enc_0.1-3